# -*- coding: utf-8 -*-
"""LDA.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/15HA5CNJiA_Ujd10_lxbnoApLKCEYJ6Dc
"""

import numpy as np
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

print("Original Data Shape (X):", X.shape)
print("Original Target Shape (y):", y.shape)
print("Feature Names:", feature_names)
print("Target Names:", target_names)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

print("\nTraining Data Shape (X_train):", X_train.shape)
print("Testing Data Shape (X_test):", X_test.shape)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test) # Use transform, not fit_transform, for test set

print("\nFirst 5 rows of scaled training data:\n", X_train_scaled[:5])

# Apply LDA
# n_components can be an integer less than or equal to C-1 (number of classes - 1)
# or None (default), which keeps all possible components.
# For Iris (3 classes), max components = 2.
lda = LinearDiscriminantAnalysis(n_components=2) # We want to reduce to 2D for visualization

X_train_lda = lda.fit_transform(X_train_scaled, y_train)
X_test_lda = lda.transform(X_test_scaled)

print("\nTransformed Training Data Shape (LDA to 2D):", X_train_lda.shape)
print("First 5 rows of LDA transformed training data:\n", X_train_lda[:5])

# The explained variance ratio in LDA indicates how much of the class separability
# is captured by each discriminant.
print("\nExplained variance ratio by each discriminant:", lda.explained_variance_ratio_)
print("Total explained variance (class separability):", np.sum(lda.explained_variance_ratio_))

# The coefficients of the linear discriminants (the transformation matrix)
print("\nCoefficients of linear discriminants (components):\n", lda.coef_)
# lda.coef_ has shape (n_classes, n_features) and represents the decision boundaries
# For each class, it shows how features contribute to that class's discriminant.
# Note: These are not directly the eigenvectors; the transformation matrix is implicitly used.

# The class means in the transformed space
print("\nClass means in original feature space (scaled):\n", lda.means_)

# Visualize the LDA transformed training data
plt.figure(figsize=(10, 8))
colors = ['navy', 'turquoise', 'darkorange']

for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_train_lda[y_train == i, 0], X_train_lda[y_train == i, 1],
                color=color, alpha=.8, lw=2, label=target_name)

plt.title('LDA of Iris dataset (Training Data)')
plt.xlabel('Linear Discriminant 1')
plt.ylabel('Linear Discriminant 2')
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.grid(True)
plt.show()

# Visualize the LDA transformed testing data
plt.figure(figsize=(10, 8))
for color, i, target_name in zip(colors, [0, 1, 2], target_names):
    plt.scatter(X_test_lda[y_test == i, 0], X_test_lda[y_test == i, 1],
                color=color, alpha=.8, lw=2, label=target_name)

plt.title('LDA of Iris dataset (Testing Data)')
plt.xlabel('Linear Discriminant 1')
plt.ylabel('Linear Discriminant 2')
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.grid(True)
plt.show()